library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.3.1
## Warning: package 'lubridate' was built under R version 4.3.1
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lavaan)
## This is lavaan 0.6-16
## lavaan is FREE software! Please report any bugs.
library(semTools)
##  
## ###############################################################################
## This is semTools 0.5-6
## All users of R (or SEM) are invited to submit functions or ideas for functions.
## ###############################################################################
## 
## Attaching package: 'semTools'
## 
## The following object is masked from 'package:readr':
## 
##     clipboard
library(plotly)
## Warning: package 'plotly' was built under R version 4.3.1
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
holdout <- arrow::read_feather("ignore.data-holdout-set-item-similarity-20230710-164559")
llm_holdout_meta <- arrow::read_feather("ignore.llmdata-holdout-set-item-similarity-20230710-164559.feather")
holdout_real <- holdout %>% 
  select(ItemStemIdA, ItemStemIdB, Pearson) %>% 
  left_join(llm_holdout_meta %>% select(ItemStemIdA = ItemStemId, VariableA = Variable, InstrumentA = instrument, ScaleA = scale_0, SubscaleA = scale_1)) %>% 
  left_join(llm_holdout_meta %>% select(ItemStemIdB = ItemStemId, VariableB = Variable, InstrumentB = instrument, ScaleB = scale_0, SubscaleB = scale_1))
## Joining with `by = join_by(ItemStemIdA)`
## Joining with `by = join_by(ItemStemIdB)`
holdout_llm <- holdout %>% 
  select(ItemStemIdA, ItemStemIdB, CosineSimilarity) %>% 
  left_join(llm_holdout_meta %>% select(ItemStemIdA = ItemStemId, VariableA = Variable, InstrumentA = instrument, ScaleA = scale_0, SubscaleA = scale_1)) %>% 
  left_join(llm_holdout_meta %>% select(ItemStemIdB = ItemStemId, VariableB = Variable, InstrumentB = instrument, ScaleB = scale_0, SubscaleB = scale_1))
## Joining with `by = join_by(ItemStemIdA)`
## Joining with `by = join_by(ItemStemIdB)`
cors_llm <- holdout_llm %>% 
  drop_na(InstrumentA, ScaleA, InstrumentB, ScaleB) %>% 
  select(x = VariableA, y = VariableB, r = CosineSimilarity) %>% 
  as.data.frame() |> 
  igraph::graph_from_data_frame(directed = FALSE) |> 
  igraph::as_adjacency_matrix(attr = "r", sparse = FALSE)
diag(cors_llm) <- 1

cors_real <- holdout_real %>% 
  drop_na(InstrumentA, ScaleA, InstrumentB, ScaleB) %>% 
  select(x = VariableA, y = VariableB, r = Pearson) %>% 
  as.data.frame() |> 
  igraph::graph_from_data_frame(directed = FALSE) |> 
  igraph::as_adjacency_matrix(attr = "r", sparse = FALSE)
diag(cors_real) <- 1

scales <- llm_holdout_meta %>% 
  drop_na(instrument, scale_0) %>% 
  mutate(scale = str_replace_all(paste(instrument, scale_0), "[^a-zA-Z_0-9]", "_")) %>% 
  group_by(scale) %>% 
  summarise(
    items = list(Variable),
    lvn = paste(first(scale), " =~ ", paste(Variable, collapse = " + "))) %>% 
  drop_na()

random_scales <- list()
for(i in 1:100) {
  n_items <- rpois(1, 10)
  random_scales[[i]] <- llm_holdout_meta %>% 
    drop_na(instrument, scale_0) %>% 
    sample_n(n_items) %>%  
    mutate(scale = paste0("random", i)) %>% 
  group_by(scale) %>% 
  summarise(
    items = list(Variable),
    lvn = paste(first(scale), " =~ ", paste(Variable, collapse = " + "))) %>% 
  drop_na()
}
random_scales <- bind_rows(random_scales)
scales <- bind_rows(scales, random_scales)

find_reverse_items <- function(rs) {
  # Calculate the mean correlation for each item, excluding the item's correlation with itself.
  mean_correlations <- apply(rs, 1, function(x) mean(x[-which(x == 1)]))

  # Identify items with negative mean correlation
  # You may adjust the threshold according to your specific criteria.
  threshold <- -0.01
  reverse_keyed_items <- names(which(mean_correlations < threshold))

  # Now you know which items are likely to be reverse-coded.  
  reverse_keyed_items
}

find_reverse_items_by_first_item <- function(rs) {
  # negatively correlated with first item
  items <- rs[-1, 1]
  reverse_keyed_items <- names(items)[which(items < 0)]
  reverse_keyed_items
}

reverse_items <- function(rs, reverse_keyed_items) {
  # Reverse the correlations for the reverse-keyed items
  for (item in reverse_keyed_items) {
    # Get the index of the reverse-keyed item
    item_index <- which(rownames(rs) == item)
    
    # Reverse the correlations
    rs[item_index, ] <- rs[item_index, ] * -1
    rs[, item_index] <- rs[, item_index] * -1
    
    # Since the diagonal is the correlation of the item with itself, set it back to 1
    rs[item_index, item_index] <- 1
  }
  rs
}


scales <- scales %>% 
  rowwise() %>% 
  mutate(r_real = list(cors_real[items, items]),
         r_llm = list(cors_llm[items, items])) %>% 
  mutate(reverse_items = list(find_reverse_items_by_first_item(r_real)),
         r_real_rev = list(reverse_items(r_real, reverse_items)),
         r_llm_rev = list(reverse_items(r_llm, reverse_items))) %>% 
  mutate(
    cfa_real = list(cfa(lvn, sample.cov = r_real_rev, sample.nobs = 1000)),
    rel_real = semTools::compRelSEM(cfa_real)) %>%
  mutate(
    cfa_llm = list(cfa(lvn, sample.cov = r_llm_rev, sample.nobs = 1000)),
    rel_llm = semTools::compRelSEM(cfa_llm))
## Warning: There were 3 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `cfa_real = list(cfa(lvn, sample.cov = r_real_rev, sample.nobs =
##   1000))`.
## ℹ In row 35.
## Caused by warning in `lavaan::lavaan()`:
## ! lavaan WARNING:
##     the optimizer (NLMINB) claimed the model converged, but not all
##     elements of the gradient are (near) zero; the optimizer may not
##     have found a local solution use check.gradient = FALSE to skip
##     this check.
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 2 remaining warnings.
## Warning: There were 10 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `cfa_llm = list(cfa(lvn, sample.cov = r_llm_rev, sample.nobs =
##   1000))`.
## ℹ In row 37.
## Caused by warning in `lavaan::lavaan()`:
## ! lavaan WARNING:
##     the optimizer warns that a solution has NOT been found!
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 9 remaining warnings.
scales <- scales %>% filter(between(as.vector(rel_llm), 0, 1)) %>% filter(between(as.vector(rel_real), 0, 1))
cor.test(scales$rel_llm, scales$rel_real)
## 
##  Pearson's product-moment correlation
## 
## data:  scales$rel_llm and scales$rel_real
## t = 13.447, df = 120, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.6930576 0.8376234
## sample estimates:
##       cor 
## 0.7753024
# scales %>% filter(scale == "LOT_Optimism") %>% pull(cfa_real) %>% .[[1]] %>% summary
broom::tidy(cor.test(holdout$CosineSimilarity, holdout$Pearson))
## # A tibble: 1 × 8
##   estimate statistic p.value parameter conf.low conf.high method     alternative
##      <dbl>     <dbl>   <dbl>     <int>    <dbl>     <dbl> <chr>      <chr>      
## 1    0.697      293.       0     90949    0.694     0.700 Pearson's… two.sided
holdout %>% 
  mutate(items = str_c(ItemStemTextA, "\n", ItemStemTextB)) %>% 
ggplot(., aes(CosineSimilarity, Pearson, label = items)) + 
  geom_abline(linetype = "dashed") +
  geom_point(size = 0.1, alpha = 0.1) +
  scale_color_viridis_d(guide = "none", end = 0.8) +
  ylab("Actual correlation") +
  theme_bw() +
  xlab("LLM cosine similarity") +
  ggtitle("Predicting correlations from LLMs, r = 0.7") +
  guides(color = "none") +
  coord_fixed(xlim = c(-1,1), ylim = c(-1,1)) -> p

p

ggplotly(p)
scales %>% 
  mutate(rel_real = round(rel_real, 2)) %>% 
  mutate(rel_llm = round(rel_llm, 2)) %>% 
ggplot(., aes(rel_real, rel_llm, label = scale, color  =  str_detect(scale, "random"))) + 
  geom_abline(linetype = "dashed") +
  geom_point() +
  scale_color_viridis_d(guide = "none", end = 0.8) +
  xlab("Actual reliability") +
  ylab("LLM-estimated reliability") +
  guides(color = "none") +
  coord_fixed(xlim = c(0,1), ylim = c(0,1)) -> p

ggplotly(p)

Subscales

cors_llm <- holdout_llm %>% 
  mutate(ScaleA = coalesce(str_c(ScaleA, SubscaleA), ScaleA)) %>% 
  mutate(ScaleB = coalesce(str_c(ScaleB, SubscaleB), ScaleB)) %>% 
  drop_na(InstrumentA, ScaleA, InstrumentB, ScaleB) %>% 
  select(x = VariableA, y = VariableB, r = CosineSimilarity) %>% 
  as.data.frame() |> 
  igraph::graph_from_data_frame(directed = FALSE) |> 
  igraph::as_adjacency_matrix(attr = "r", sparse = FALSE)
diag(cors_llm) <- 1

cors_real <- holdout_real %>% 
  mutate(ScaleA = coalesce(str_c(ScaleA, SubscaleA), ScaleA)) %>% 
  mutate(ScaleB = coalesce(str_c(ScaleB, SubscaleB), ScaleB)) %>% 
  drop_na(InstrumentA, ScaleA, InstrumentB, ScaleB) %>% 
  select(x = VariableA, y = VariableB, r = Pearson) %>% 
  as.data.frame() |> 
  igraph::graph_from_data_frame(directed = FALSE) |> 
  igraph::as_adjacency_matrix(attr = "r", sparse = FALSE)
diag(cors_real) <- 1

scales <- llm_holdout_meta %>% 
  mutate(scale_0 = coalesce(str_c(scale_0, scale_1), scale_0)) %>% 
  drop_na(instrument, scale_0) %>% 
  mutate(scale = str_replace_all(paste(instrument, scale_0), "[^a-zA-Z_0-9]", "_")) %>% 
  group_by(scale) %>% 
  summarise(
    items = list(Variable),
    lvn = paste(first(scale), " =~ ", paste(Variable, collapse = " + "))) %>% 
  drop_na()

random_scales <- list()
for(i in 1:30) {
  n_items <- rpois(1, 10)
  random_scales[[i]] <- llm_holdout_meta %>% 
    drop_na(instrument, scale_0) %>% 
    sample_n(n_items) %>%  
    mutate(scale = paste0("random", i)) %>% 
  group_by(scale) %>% 
  summarise(
    items = list(Variable),
    lvn = paste(first(scale), " =~ ", paste(Variable, collapse = " + "))) %>% 
  drop_na()
}
random_scales <- bind_rows(random_scales)
scales <- bind_rows(scales, random_scales)

find_reverse_items <- function(rs) {
  # Calculate the mean correlation for each item, excluding the item's correlation with itself.
  mean_correlations <- apply(rs, 1, function(x) mean(x[-which(x == 1)]))

  # Identify items with negative mean correlation
  # You may adjust the threshold according to your specific criteria.
  threshold <- -0.01
  reverse_keyed_items <- names(which(mean_correlations < threshold))

  # Now you know which items are likely to be reverse-coded.  
  reverse_keyed_items
}

find_reverse_items_by_first_item <- function(rs) {
  # negatively correlated with first item
  items <- rs[-1, 1]
  reverse_keyed_items <- names(items)[which(items < 0)]
  reverse_keyed_items
}

reverse_items <- function(rs, reverse_keyed_items) {
  # Reverse the correlations for the reverse-keyed items
  for (item in reverse_keyed_items) {
    # Get the index of the reverse-keyed item
    item_index <- which(rownames(rs) == item)
    
    # Reverse the correlations
    rs[item_index, ] <- rs[item_index, ] * -1
    rs[, item_index] <- rs[, item_index] * -1
    
    # Since the diagonal is the correlation of the item with itself, set it back to 1
    rs[item_index, item_index] <- 1
  }
  rs
}


scales <- scales %>% 
  rowwise() %>% 
  mutate(r_real = list(cors_real[items, items]),
         r_llm = list(cors_llm[items, items])) %>% 
  mutate(reverse_items = list(find_reverse_items_by_first_item(r_real)),
         r_real_rev = list(reverse_items(r_real, reverse_items)),
         r_llm_rev = list(reverse_items(r_llm, reverse_items))) %>% 
  mutate(
    cfa_real = list(cfa(lvn, sample.cov = r_real_rev, sample.nobs = 1000)),
    rel_real = semTools::compRelSEM(cfa_real)) %>%
  mutate(
    cfa_llm = list(cfa(lvn, sample.cov = r_llm_rev, sample.nobs = 1000)),
    rel_llm = semTools::compRelSEM(cfa_llm))
## Warning: There were 7 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `cfa_real = list(cfa(lvn, sample.cov = r_real_rev, sample.nobs =
##   1000))`.
## ℹ In row 41.
## Caused by warning in `lav_model_vcov()`:
## ! lavaan WARNING:
##     Could not compute standard errors! The information matrix could
##     not be inverted. This may be a symptom that the model is not
##     identified.
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 6 remaining warnings.
## Warning: There were 8 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `cfa_llm = list(cfa(lvn, sample.cov = r_llm_rev, sample.nobs =
##   1000))`.
## ℹ In row 13.
## Caused by warning in `lavaan::lavaan()`:
## ! lavaan WARNING:
##     the optimizer warns that a solution has NOT been found!
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 7 remaining warnings.
scales <- scales %>% filter(between(as.vector(rel_llm), 0, 1)) %>% filter(between(as.vector(rel_real), 0, 1))
cor.test(scales$rel_llm, scales$rel_real)
## 
##  Pearson's product-moment correlation
## 
## data:  scales$rel_llm and scales$rel_real
## t = 10.114, df = 73, p-value = 1.572e-15
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.6495444 0.8444482
## sample estimates:
##       cor 
## 0.7638919
# scales %>% filter(scale == "LOT_Optimism") %>% pull(cfa_real) %>% .[[1]] %>% summary
scales %>% 
  mutate(rel_real = round(rel_real, 2)) %>% 
  mutate(rel_llm = round(rel_llm, 2)) %>% 
ggplot(., aes(rel_real, rel_llm, label = scale)) + 
  geom_abline(linetype = "dashed") +
  geom_point() +
  coord_fixed(xlim = c(0,1), ylim = c(0,1)) -> p

ggplotly(p)

Within-Scale

holdout_both <- holdout %>% 
  select(ItemStemIdA, ItemStemIdB, Pearson, CosineSimilarity) %>% 
  left_join(llm_holdout_meta %>% select(ItemStemIdA = ItemStemId, VariableA = Variable, InstrumentA = instrument, ScaleA = scale_0, SubscaleA = scale_1)) %>% 
  left_join(llm_holdout_meta %>% select(ItemStemIdB = ItemStemId, VariableB = Variable, InstrumentB = instrument, ScaleB = scale_0, SubscaleB = scale_1))
## Joining with `by = join_by(ItemStemIdA)`
## Joining with `by = join_by(ItemStemIdB)`
holdout_both %>% 
  mutate(same_scale = if_else(ScaleA == ScaleB & SubscaleA == SubscaleB, 1,0,0),
         same_instrument = if_else(InstrumentA == InstrumentB, 1, 0,0)) %>% 
  group_by(same_scale, same_instrument) %>% 
  summarise(cor(Pearson, CosineSimilarity))
## `summarise()` has grouped output by 'same_scale'. You can override using the
## `.groups` argument.
## # A tibble: 4 × 3
## # Groups:   same_scale [2]
##   same_scale same_instrument `cor(Pearson, CosineSimilarity)`
##        <dbl>           <dbl>                            <dbl>
## 1          0               0                            0.688
## 2          0               1                            0.722
## 3          1               0                            0.878
## 4          1               1                            0.730
holdout_both %>% 
  mutate(same_scale = if_else(ScaleA == ScaleB & SubscaleA == SubscaleB, 1,0,0),
         same_instrument = if_else(InstrumentA == InstrumentB, 1, 0,0)) %>% 
  group_by(same_scale, same_instrument) %>% 
  summarise(cor(abs(Pearson), abs(CosineSimilarity)))
## `summarise()` has grouped output by 'same_scale'. You can override using the
## `.groups` argument.
## # A tibble: 4 × 3
## # Groups:   same_scale [2]
##   same_scale same_instrument `cor(abs(Pearson), abs(CosineSimilarity))`
##        <dbl>           <dbl>                                      <dbl>
## 1          0               0                                      0.508
## 2          0               1                                      0.592
## 3          1               0                                      0.417
## 4          1               1                                      0.683
holdout_both %>% 
  mutate(same_scale = if_else(ScaleA == ScaleB & SubscaleA == SubscaleB, 1,0,0),
         same_instrument = if_else(InstrumentA == InstrumentB, 1, 0,0)) %>% 
  group_by(same_scale, same_instrument, ScaleA, SubscaleA, InstrumentA) %>% 
  summarise(cor = cor(Pearson, CosineSimilarity)) %>% 
  group_by(same_scale, same_instrument) %>% 
  summarise(mean(cor, na.rm = T))
## `summarise()` has grouped output by 'same_scale', 'same_instrument', 'ScaleA',
## 'SubscaleA'. You can override using the `.groups` argument.
## `summarise()` has grouped output by 'same_scale'. You can override using the
## `.groups` argument.
## # A tibble: 4 × 3
## # Groups:   same_scale [2]
##   same_scale same_instrument `mean(cor, na.rm = T)`
##        <dbl>           <dbl>                  <dbl>
## 1          0               0                  0.621
## 2          0               1                  0.569
## 3          1               0                  0.644
## 4          1               1                  0.437

Scales

cors_llm <- holdout_llm %>% 
  mutate(ScaleA = coalesce(str_c(ScaleA, SubscaleA), ScaleA)) %>% 
  mutate(ScaleB = coalesce(str_c(ScaleB, SubscaleB), ScaleB)) %>% 
  drop_na(InstrumentA, ScaleA, InstrumentB, ScaleB) %>% 
  select(x = VariableA, y = VariableB, r = CosineSimilarity) %>% 
  as.data.frame() |> 
  igraph::graph_from_data_frame(directed = FALSE) |> 
  igraph::as_adjacency_matrix(attr = "r", sparse = FALSE)
diag(cors_llm) <- 1

cors_real <- holdout_real %>% 
  mutate(ScaleA = coalesce(str_c(ScaleA, SubscaleA), ScaleA)) %>% 
  mutate(ScaleB = coalesce(str_c(ScaleB, SubscaleB), ScaleB)) %>% 
  drop_na(InstrumentA, ScaleA, InstrumentB, ScaleB) %>% 
  select(x = VariableA, y = VariableB, r = Pearson) %>% 
  as.data.frame() |> 
  igraph::graph_from_data_frame(directed = FALSE) |> 
  igraph::as_adjacency_matrix(attr = "r", sparse = FALSE)
diag(cors_real) <- 1

scales <- llm_holdout_meta %>% 
  mutate(scale_0 = coalesce(str_c(scale_0, scale_1), scale_0)) %>% 
  drop_na(instrument, scale_0) %>% 
  mutate(scale = str_replace_all(paste(instrument, scale_0), "[^a-zA-Z_0-9]", "_")) %>% 
  group_by(scale) %>% 
  summarise(
    items = list(Variable),
    lvn = paste(first(scale), " =~ ", paste(Variable, collapse = " + "))) %>% 
  drop_na()

scales <- scales %>% 
  rowwise() %>% 
  mutate(r_real = list(cors_real[items, items]),
         r_llm = list(cors_llm[items, items])) %>% 
  mutate(reverse_items = list(find_reverse_items_by_first_item(r_real)))

cors_real_rev = reverse_items(cors_real, scales$reverse_items %>% unlist())
cors_llm_rev = reverse_items(cors_llm, scales$reverse_items %>% unlist())


model <- scales$lvn %>% head(20) %>% paste(collapse = "\n")

lcor_real <- cfa(model, 
            sample.cov = cors_real_rev, sample.nobs = 1000)
## Warning in lav_object_post_check(object): lavaan WARNING: covariance matrix of latent variables
##                 is not positive definite;
##                 use lavInspect(fit, "cov.lv") to investigate.
lcor_llm <- cfa(model, 
            sample.cov = cors_llm_rev, sample.nobs = 1000)
## Warning in lavaan::lavaan(model = model, sample.cov = cors_llm_rev, sample.nobs = 1000, : lavaan WARNING:
##     the optimizer (NLMINB) claimed the model converged, but not all
##     elements of the gradient are (near) zero; the optimizer may not
##     have found a local solution use check.gradient = FALSE to skip
##     this check.
estimated_rs <- standardizedsolution(lcor_real) %>% 
  full_join(standardizedsolution(lcor_llm), by = c("lhs", "op", "rhs")) %>% 
  select(lhs, op, rhs, est.std.x, est.std.y) 
## Warning in lav_model_vcov(lavmodel = lavmodel, lavsamplestats = object@SampleStats, : lavaan WARNING:
##     Could not compute standard errors! The information matrix could
##     not be inverted. This may be a symptom that the model is not
##     identified.
lv_rs <- estimated_rs %>% filter(op == "~~") %>% 
  filter(lhs != rhs)


cor.test(lv_rs$est.std.x, lv_rs$est.std.y)
## 
##  Pearson's product-moment correlation
## 
## data:  lv_rs$est.std.x and lv_rs$est.std.y
## t = 27.541, df = 188, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.8627924 0.9202752
## sample estimates:
##       cor 
## 0.8951964
lv_rs %>% 
  mutate(correlation = round(est.std.x, 2)) %>% 
  mutate(llm_based = round(est.std.y, 2)) %>% 
  mutate(scales = str_c(lhs, "\n", rhs)) %>% 
ggplot(., aes(correlation, llm_based, label = scales)) + 
  geom_abline(linetype = "dashed") +
  geom_point() +
  xlab("Latente Skalenkorrelation, empirisch") +
  ylab("Latente Skalenkorrelation, LLM") + 
  theme_bw() +
  coord_fixed(xlim = c(-1,1), ylim = c(-1,1)) -> p
p

ggplotly(p)

Shuffle on through

for(i in 1:30) {
  model <- scales$lvn %>% sample(10) %>% paste(collapse = "\n")
  
  lcor_real <- cfa(model, 
              sample.cov = cors_real_rev, sample.nobs = 1000)
  lcor_llm <- cfa(model, 
              sample.cov = cors_llm_rev, sample.nobs = 1000)
  
  estimated_rs <- standardizedsolution(lcor_real) %>% 
    full_join(standardizedsolution(lcor_llm), by = c("lhs", "op", "rhs")) %>% 
    select(lhs, op, rhs, est.std.x, est.std.y) 
  
  lv_rs <- bind_rows(lv_rs,
                     estimated_rs %>% filter(op == "~~") %>% 
                       filter(lhs != rhs)
  )
}
## Warning in lav_object_post_check(object): lavaan WARNING: covariance matrix of latent variables
##                 is not positive definite;
##                 use lavInspect(fit, "cov.lv") to investigate.

## Warning in lav_object_post_check(object): lavaan WARNING: covariance matrix of latent variables
##                 is not positive definite;
##                 use lavInspect(fit, "cov.lv") to investigate.
## Warning in lavaan::lavaan(model = model, sample.cov = cors_llm_rev, sample.nobs = 1000, : lavaan WARNING:
##     the optimizer warns that a solution has NOT been found!
## Warning in lav_model_vcov(lavmodel = lavmodel, lavsamplestats = object@SampleStats, : lavaan WARNING:
##     Could not compute standard errors! The information matrix could
##     not be inverted. This may be a symptom that the model is not
##     identified.
## Warning in lav_object_post_check(object): lavaan WARNING: covariance matrix of latent variables
##                 is not positive definite;
##                 use lavInspect(fit, "cov.lv") to investigate.
## Warning in lavaan::lavaan(model = model, sample.cov = cors_llm_rev, sample.nobs = 1000, : lavaan WARNING:
##     the optimizer (NLMINB) claimed the model converged, but not all
##     elements of the gradient are (near) zero; the optimizer may not
##     have found a local solution use check.gradient = FALSE to skip
##     this check.
## Warning in lavaan::lavaan(model = model, sample.cov = cors_llm_rev, sample.nobs = 1000, : lavaan WARNING:
##     the optimizer warns that a solution has NOT been found!
## Warning in lav_model_vcov(lavmodel = lavmodel, lavsamplestats = object@SampleStats, : lavaan WARNING:
##     Could not compute standard errors! The information matrix could
##     not be inverted. This may be a symptom that the model is not
##     identified.
## Warning in lav_object_post_check(object): lavaan WARNING: covariance matrix of latent variables
##                 is not positive definite;
##                 use lavInspect(fit, "cov.lv") to investigate.

## Warning in lav_object_post_check(object): lavaan WARNING: covariance matrix of latent variables
##                 is not positive definite;
##                 use lavInspect(fit, "cov.lv") to investigate.
## Warning in lav_object_post_check(object): lavaan WARNING: some estimated ov
## variances are negative
## Warning in lavaan::lavaan(model = model, sample.cov = cors_llm_rev, sample.nobs = 1000, : lavaan WARNING:
##     the optimizer warns that a solution has NOT been found!
## Warning in lav_model_vcov(lavmodel = lavmodel, lavsamplestats = object@SampleStats, : lavaan WARNING:
##     Could not compute standard errors! The information matrix could
##     not be inverted. This may be a symptom that the model is not
##     identified.
## Warning in lav_object_post_check(object): lavaan WARNING: covariance matrix of latent variables
##                 is not positive definite;
##                 use lavInspect(fit, "cov.lv") to investigate.

## Warning in lav_object_post_check(object): lavaan WARNING: covariance matrix of latent variables
##                 is not positive definite;
##                 use lavInspect(fit, "cov.lv") to investigate.

## Warning in lav_object_post_check(object): lavaan WARNING: covariance matrix of latent variables
##                 is not positive definite;
##                 use lavInspect(fit, "cov.lv") to investigate.
## Warning in lavaan::lavaan(model = model, sample.cov = cors_llm_rev, sample.nobs = 1000, : lavaan WARNING:
##     the optimizer (NLMINB) claimed the model converged, but not all
##     elements of the gradient are (near) zero; the optimizer may not
##     have found a local solution use check.gradient = FALSE to skip
##     this check.
## Warning in lav_model_vcov(lavmodel = lavmodel, lavsamplestats = object@SampleStats, : lavaan WARNING:
##     Could not compute standard errors! The information matrix could
##     not be inverted. This may be a symptom that the model is not
##     identified.
## Warning in lav_object_post_check(object): lavaan WARNING: covariance matrix of latent variables
##                 is not positive definite;
##                 use lavInspect(fit, "cov.lv") to investigate.

## Warning in lav_object_post_check(object): lavaan WARNING: covariance matrix of latent variables
##                 is not positive definite;
##                 use lavInspect(fit, "cov.lv") to investigate.
lv_rsd <- lv_rs %>% distinct(lhs, rhs, .keep_all = T)



cor.test(lv_rsd$est.std.x, lv_rsd$est.std.y)
## 
##  Pearson's product-moment correlation
## 
## data:  lv_rsd$est.std.x and lv_rsd$est.std.y
## t = 60.214, df = 1108, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.8606573 0.8882753
## sample estimates:
##       cor 
## 0.8751774